# Timing Matters: Climate Policies and Adaptive Resilience
# Robustness Tests and Diagnostics
# D'Amico & Maboudi (2025)

# ============================================================================
# SETUP
# ============================================================================

rm(list = ls())

library(car)
library(dplyr)
library(lmtest)
library(plm)
library(readr)
library(sandwich)
library(tseries)

# Load cleaned data
pdata <- read_csv("pdata_clean.csv")
pdata <- pdata.frame(pdata, index = c("iso3c", "year"))

# ============================================================================
# MODEL SPECIFICATION SELECTION (APPENDIX F)
# ============================================================================

cat("=== MODEL SPECIFICATION SELECTION ===\n\n")

# Baseline specification
predictors_All_Years <- c(
  "adaptation_law_stock", "imports_pGDP", "max_populism", 
  "ROL_perc", "percent_gdp_services", "auton",
  "disaster_count", "ln_gdp", "gdpgrowth_per", "adaptationaid_recipient"
)

formula_str <- paste("lag1_capacity ~", paste(predictors_All_Years, collapse = " + "))
model_formula <- as.formula(formula_str)

# Test alternative specifications

# 1. Two-way fixed effects
model_twoway_fe <- plm(model_formula, data = pdata, 
                       model = "within", effect = "twoways")

# 2. Country fixed effects only
model_country_fe <- plm(model_formula, data = pdata, 
                        model = "within", effect = "individual")

# 3. Time fixed effects only
model_time_fe <- plm(model_formula, data = pdata, 
                     model = "within", effect = "time")

# 4. Random effects
model_re <- plm(model_formula, data = pdata, model = "random")

# 5. Pooled OLS
model_ols <- plm(model_formula, data = pdata, model = "pooling")

# Compare model fit
cat("Model Fit Comparison:\n\n")

fit_comparison <- data.frame(
  Model = c("Two-way FE", "Country FE", "Time FE", "Random Effects", "Pooled OLS"),
  R_squared = c(
    summary(model_twoway_fe)$r.squared[1],
    summary(model_country_fe)$r.squared[1],
    summary(model_time_fe)$r.squared[1],
    summary(model_re)$r.squared[1],
    summary(model_ols)$r.squared[1]
  ),
  Adj_R_squared = c(
    summary(model_twoway_fe)$r.squared[2],
    summary(model_country_fe)$r.squared[2],
    summary(model_time_fe)$r.squared[2],
    summary(model_re)$r.squared[2],
    summary(model_ols)$r.squared[2]
  )
)

print(fit_comparison)

# Hausman test: Fixed vs Random effects
cat("\n=== HAUSMAN TEST (FE vs RE) ===\n")
hausman_test <- phtest(model_time_fe, model_re)
print(hausman_test)

if (hausman_test$p.value < 0.05) {
  cat("\nConclusion: Fixed effects preferred (p < 0.05)\n")
} else {
  cat("\nConclusion: Random effects not rejected\n")
}

# Select preferred model: Time FE
cat("\n=== SELECTED MODEL: TIME FIXED EFFECTS ===\n")
selected_model <- model_time_fe

# ============================================================================
# COMPREHENSIVE DIAGNOSTIC TESTING (APPENDIX G)
# ============================================================================

cat("\n\n=== DIAGNOSTIC TESTING ===\n\n")

# Calculate cluster-robust standard errors
cov_fe <- vcovHC(selected_model, method = "arellano", type = "HC1", cluster = "group")
coef_results <- coeftest(selected_model, vcov = cov_fe)

# 1. Model fit statistics
cat("=== FIT STATISTICS ===\n")
cat("R-squared:", round(summary(selected_model)$r.squared[1]*100, 2), "%\n")
cat("Adjusted R-squared:", round(summary(selected_model)$r.squared[2]*100, 2), "%\n")
cat("F-statistic:", round(summary(selected_model)$fstatistic$statistic, 2), "\n")
cat("Observations:", nrow(selected_model$model), "\n")
cat("Countries:", length(unique(index(selected_model)$iso3c)), "\n")
cat("Time periods:", length(unique(index(selected_model)$year)), "\n\n")

# 2. Residual analysis
residuals_model <- residuals(selected_model)

cat("=== RESIDUAL SUMMARY ===\n")
print(summary(residuals_model))
cat("SD:", round(sd(residuals_model), 6), "\n")
cat("MAE:", round(mean(abs(residuals_model)), 6), "\n")
cat("RMSE:", round(sqrt(mean(residuals_model^2)), 6), "\n\n")

# 3. Normality tests
cat("=== NORMALITY TESTS ===\n")

# Jarque-Bera test
jb_test <- jarque.bera.test(residuals_model)
cat("Jarque-Bera Test:\n")
cat("  Statistic:", round(jb_test$statistic, 2), "\n")
cat("  p-value:", format(jb_test$p.value, scientific = TRUE), "\n")
if (jb_test$p.value < 0.05) {
  cat("  Result: Residuals are NOT normally distributed\n")
} else {
  cat("  Result: Residuals appear normally distributed\n")
}

# Shapiro-Wilk test (on sample if large dataset)
if (length(residuals_model) > 5000) {
  sample_residuals <- sample(residuals_model, 5000)
  shapiro_test <- shapiro.test(sample_residuals)
  cat("\nShapiro-Wilk Test (sample of 5000):\n")
} else {
  shapiro_test <- shapiro.test(residuals_model)
  cat("\nShapiro-Wilk Test:\n")
}
cat("  W:", round(shapiro_test$statistic, 4), "\n")
cat("  p-value:", format(shapiro_test$p.value, scientific = TRUE), "\n\n")

# 4. Heteroscedasticity tests
cat("=== HETEROSCEDASTICITY TESTS ===\n")

bp_test <- bptest(selected_model)
cat("Breusch-Pagan Test:\n")
cat("  Statistic:", round(bp_test$statistic, 2), "\n")
cat("  p-value:", format(bp_test$p.value, scientific = TRUE), "\n")
if (bp_test$p.value < 0.05) {
  cat("  Result: Heteroscedasticity detected\n")
  cat("  Note: Cluster-robust SEs address this issue\n\n")
} else {
  cat("  Result: Homoscedasticity\n\n")
}

# 5. Serial correlation tests
cat("=== SERIAL CORRELATION TESTS ===\n")

# Breusch-Godfrey test
bg_test1 <- bgtest(selected_model, order = 1)
cat("Breusch-Godfrey Test (AR-1):\n")
cat("  Statistic:", round(bg_test1$statistic, 2), "\n")
cat("  p-value:", format(bg_test1$p.value, scientific = TRUE), "\n")

bg_test2 <- bgtest(selected_model, order = 2)
cat("\nBreusch-Godfrey Test (AR-2):\n")
cat("  Statistic:", round(bg_test2$statistic, 2), "\n")
cat("  p-value:", format(bg_test2$p.value, scientific = TRUE), "\n\n")

# 6. Multicollinearity
cat("=== MULTICOLLINEARITY (VIF) ===\n")

lm_model <- lm(selected_model$formula, data = selected_model$model)
vif_values <- vif(lm_model)

vif_results <- data.frame(
  Variable = names(vif_values),
  VIF = round(vif_values, 2),
  Status = case_when(
    vif_values > 10 ~ "HIGH",
    vif_values > 5 ~ "MODERATE", 
    TRUE ~ "LOW"
  )
)
print(vif_results)

cat("\nSummary:\n")
cat("Variables with HIGH VIF (>10):", sum(vif_values > 10), "\n")
cat("Variables with MODERATE VIF (5-10):", sum(vif_values > 5 & vif_values <= 10), "\n")
cat("Variables with LOW VIF (<5):", sum(vif_values <= 5), "\n\n")

# 7. Standard vs Cluster-robust SEs comparison
cat("=== STANDARD vs CLUSTER-ROBUST SEs ===\n")

standard_ses <- summary(selected_model)$coefficients[, "Std. Error"]
cluster_ses <- sqrt(diag(cov_fe))

se_comparison <- data.frame(
  Variable = names(standard_ses),
  Standard_SE = round(standard_ses, 6),
  Cluster_SE = round(cluster_ses, 6),
  Ratio = round(cluster_ses / standard_ses, 2)
)
print(se_comparison)

cat("\nAverage SE ratio (Cluster/Standard):", round(mean(se_comparison$Ratio), 2), "\n")
cat("Variables with >50% increase:", sum(se_comparison$Ratio > 1.5), "\n")
cat("Variables with >100% increase:", sum(se_comparison$Ratio > 2.0), "\n\n")

# ============================================================================
# GRANGER CAUSALITY TEST (APPENDIX H)
# ============================================================================

cat("=== PANEL GRANGER CAUSALITY TEST ===\n\n")

# Prepare data
pdata_clean <- pdata[complete.cases(pdata$capacity, pdata$adaptation_law_stock), ]

# Dumitrescu-Hurlin test
panel_granger <- pgrangertest(capacity ~ adaptation_law_stock, 
                               data = pdata_clean, order = 1)

cat("Null Hypothesis: Adaptation laws do NOT Granger-cause adaptive capacity\n")
cat("Alternative: Adaptation laws Granger-cause adaptive capacity\n\n")

print(panel_granger)

if (panel_granger$statistic[2] < 0.05) {
  cat("\nConclusion: Strong evidence of Granger causality (p < 0.001)\n")
  cat("Interpretation: Adaptation laws temporally precede changes in capacity\n")
} else {
  cat("\nConclusion: No evidence of Granger causality\n")
}

# ============================================================================
# SAVE DIAGNOSTIC RESULTS
# ============================================================================

diagnostics <- list(
  fit_comparison = fit_comparison,
  hausman_test = hausman_test,
  normality_tests = list(jarque_bera = jb_test, shapiro_wilk = shapiro_test),
  heteroscedasticity = bp_test,
  serial_correlation = list(ar1 = bg_test1, ar2 = bg_test2),
  vif = vif_results,
  se_comparison = se_comparison,
  granger_causality = panel_granger
)

saveRDS(diagnostics, "diagnostic_results.rds")

cat("\n=== ROBUSTNESS TESTING COMPLETE ===\n")
cat("Results saved to diagnostic_results.rds\n")
